import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
calender = pd.read_csv("calendar.csv")
listings = pd.read_csv("listings.csv")
reviews = pd.read_csv("reviews.csv")
calender.head()
listings.head()
reviews.head()
df1 = pd.merge(calender, listings, left_on='listing_id', right_on='id', how='left').drop('id', axis=1)
df1 = df1.fillna(-1)
df1.head()
import plotly.offline as py
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
from plotly.subplots import make_subplots
import seaborn as sns
from statistics import mean
prices = [i for i in df1['price_x'] if not -1]
for i in list(df1['price_y']):
if i != -1:
prices.append(i)
price = []
for i in prices:
i = i[:-2]
i = ''.join(e for e in i if e.isalnum())
price.append(int(i))
average = mean(price)
average
trace = go.Box(y=price, name='Price')
data = [trace]
layout = go.Layout(title='Boxplot of Price', hovermode='x')
fig = go.Figure(data=data, layout=layout)
fig.show()
It is easy to see that the averge price of Aribnb in Seattle is $127.98
fig=plt.figure(figsize=(12,8), dpi= 100, facecolor='w', edgecolor='k')
M0 = sns.heatmap(df1.corr(), annot = True).set_title('Airbnb Seattle corresponding correlation')
Based on the color of the heatmap. It is easy to see that all varibles related to reviews and availability. host_listings_count have very strong correlation to the price of an Airbnb.
fig = px.histogram(price)
fig.show()
So it is clearly to see that most of the price of airbnbs are between 35 to 175 and 150 has the most number of airbnbs